# ==============================================================================
# file name: 2-recode-weight-surveys.R
# author: Bernhard Clemm / Tiago Ventura
# date: Sep 15, 2023
# purpose: recode surveys and compute weights for donor subsample
# ==============================================================================

rm(list = ls())

source("code/utils/constants.R")
source("code/utils/survey-recoding-LU.R")

# FACEBOOK =====================================================================

survey_fb_all <- read.csv("data/surveys_raw/facebook/fb_people_table.csv")

## Sociodemographics ####

survey_fb_all <- survey_fb_all %>%
  mutate(
    age = q3_7_age,
    age_group = case_when(
      q3_7_age == 1 ~ NA,
      q3_7_age == 2 ~ 2,
      q3_7_age %in% c(3, 4) ~ 3,
      q3_7_age %in% c(5, 6) ~ 4,
      q3_7_age %in% c(7, 8) ~ 5,
      q3_7_age %in% c(9, 10, 11) ~ 6,
      q3_7_age == 11 ~ 7
    ),
    age_high = case_when(
      age == 11 ~ "65 or more", 
      age < 11 ~ "less than 65"
    ),
    female = case_when(
      q3_5_gender == 0 ~ 0,
      q3_5_gender == 1 ~ 1
    ),
    edu_high = case_when(
      q3_6_edu < 5 ~ 0,
      q3_6_edu >= 5 ~ 1
    ),
    edu_cat = case_when(
      q3_6_edu %in% c(1, 2) ~ 1,
      q3_6_edu == 2 ~ 2,
      q3_6_edu == 3 ~ 3,
      q3_6_edu %in% c(4, 5) ~ 4,
      q3_6_edu %in% c(6, 7) ~ 5
    ),
    race_cat = case_when(
      q3_8_race %in% c(6) ~ 1,
      q3_8_race %in% c(5) ~ 2,
      q3_8_race %in% c(1, 2, 3, 4) ~ 3
    ),
    white = case_when(
      q3_8_race %in% c(5, 6) ~ 1,
      q3_8_race %in% c(1, 2, 3, 4) ~ 0
    )
  )

## Survey participation / donor status ####

people_visits_fb <- read.csv("data/browsing_summarized/people_visits_FB.csv") %>%
  distinct(person_id)

survey_fb_all <- survey_fb_all %>%
  full_join(., people_visits_fb, by = c("id" = "person_id"))

# some participants did not do survey due to technical error - these are donors

survey_fb_all <- survey_fb_all %>%
  mutate(
    participated_1 = ifelse(startdate_s != "", T, F),
    participated_2 = ifelse(startdate_2 != "", T, F)
  ) %>%
  mutate(donor = ifelse(participated_1 == TRUE | is.na(participated_1), 1, 0))

write.csv(survey_fb_all, file = "data/surveys_processed/survey_FB_all.csv", row.names = F)

## Construct weights for whole sample ####

### get targets ####
# "For the Facebook sample, we base our weights on age, gender, education, and ethnicity"

survey_fb_perc_age <- prop.table(table(survey_fb_all$age_group)) * 100
survey_fb_perc_female <- prop.table(table(survey_fb_all$female)) * 100
survey_fb_perc_edu_cat <- prop.table(table(survey_fb_all$edu_cat)) * 100
survey_fb_perc_race <- prop.table(table(survey_fb_all$race_cat)) * 100

survey_fb_targets <- list(
  age_group_chr = survey_fb_perc_age,
  female_chr = survey_fb_perc_female,
  edu_cat_chr = survey_fb_perc_edu_cat,
  race_cat_chr = survey_fb_perc_race
)

### subset to donors ####

survey_fb_donors <- survey_fb_all %>%
  as.data.frame() %>%
  filter(donor == 1) %>%
  mutate(across(c(age_group, female, edu_cat, race_cat), ~ as.factor(.), .names = "{col}_chr"))

survey_fb_to_match <- survey_fb_donors %>%
  select(id, age_group_chr, female_chr, edu_cat_chr, race_cat_chr)

### Construct weights ####

survey_fb_raking <- anesrake(
  survey_fb_targets, survey_fb_to_match,
  caseid = survey_fb_to_match$id,
  type = "nolim", verbose = F
)

survey_fb_weights <- data.frame(
  id = names(survey_fb_raking$weightvec),
  weight = survey_fb_raking$weightvec
)

## Recode other variables for donor sample ####

### Political outcomes ####

survey_fb_donors <- survey_fb_donors %>%
  # Political outcomes
  mutate(
    # Partisanship/Ideology
    ideo_1 = q5_22_ideology_1,
    ideo_2 = q5_22_ideology_2,
    party_1 = ifelse(is.na(party1), party_affil, party1),
    party_bin = case_when(
      party_1 > 4 ~ "Republican",
      party_1 < 4 ~ "Democrat"
    ),
    # Affective polarization
    ft_outparty_1 = case_when(
      party_1 < 4 ~ q5_2_feeling_thermom_4_1, # Democrats
      party_1 > 4 ~ q5_2_feeling_thermom_1_1
    ), # Republicans
    ft_outparty_2 = case_when(
      party_1 < 4 ~ q5_2_feeling_thermom_4_2,
      party_1 > 4 ~ q5_2_feeling_thermom_1_2
    ),
    # Political interest
    int_politics_1 = q3_2_pol_interest_1,
    int_politics_2 = q3_2_pol_interest_2,
    # Following politics
    foll_politics_1 = q3_3_follow_pol_1,
    foll_politics_2 = q3_3_follow_pol_2
  ) %>%
  # Recode some variables to 0-1
  mutate(
    across(c(int_politics_1, int_politics_2, foll_politics_1, foll_politics_2), ~ (. - 1) / (7 - 1)),
    across(c(ideo_1, ideo_2), ~ (. - 0) / (10 - 0))
  ) %>%
  ## Recoding for between-wave analysis
  mutate(across(contains("accidental"), ~ ifelse(. == 7, NA, .)))

### Quality outcomes ####

survey_fb_donors <- survey_fb_donors %>%
  rename("duration_1" = duration_s) %>%
  # Straightlining
  mutate(
    straightliner_1 = ifelse(
      q5_6_prevent_gun_v_q5_6_more_carry_1 == q5_6_prevent_gun_v_q5_6_mental_health_1 &
        q5_6_prevent_gun_v_q5_6_mental_health_1 == q5_6_prevent_gun_v_q5_6_more_laws_1 &
        q5_6_prevent_gun_v_q5_6_more_laws_1 == q5_6_prevent_gun_v_q5_6_21_1 &
        q5_6_prevent_gun_v_q5_6_21_1 == q5_6_prevent_gun_v_q5_6_teachers_1 &
        q5_6_prevent_gun_v_q5_6_more_carry_1 != 4, 1, 0
    )
  ) %>%
  # Speeding (alternative measures)
  mutate(duration_1_median = median(duration_1, na.rm = T)) %>%
  mutate(
    duration_median_30_1 = ifelse(duration_1 < 0.7 * duration_1_median, 1, 0),
    duration_median_40_1 = ifelse(duration_1 < 0.6 * duration_1_median, 1, 0),
    duration_median_50_1 = ifelse(duration_1 < 0.5 * duration_1_median, 1, 0)
  )

## Join weights ####

survey_fb_donors <- left_join(survey_fb_donors, survey_fb_weights)

## Subset to used variables ####

survey_fb_donors <- survey_fb_donors %>%
  select(
    "person_id" = w1id, participated_1, participated_2, donor, weight,
    age, female, edu_high, white, party_1, ideo_1, ideo_2, age_high, party_bin,
    ft_outparty_1, ft_outparty_2, int_politics_1, int_politics_2,
    foll_politics_1, int_politics_2,
    straightliner_1, duration_1, duration_median_30_1, duration_median_40_1, duration_median_50_1,
    all_of(vars_w1_fb), all_of(vars_w2_fb), contains("submit")
  )

## Export ####
# note that this processed survey data set contains participant that will be ...
# excluded because of the seven-day filter

write.csv(survey_fb_donors, file = "data/surveys_processed/survey_FB_donors.csv", row.names = F)

# LUCID ========================================================================

survey_lu_w0a <- read.csv("data/surveys_raw/lucid/US_survey_w0_lucid_raw.csv")
survey_lu_w0b <- read.csv("data/surveys_raw/lucid/US_survey_w0_qualtrics_raw.csv")
survey_lu_w1 <- read.csv("data/surveys_raw/lucid/US_survey_w1_raw.csv")
survey_lu_w2 <- read.csv("data/surveys_raw/lucid/US_survey_w2_raw.csv")
survey_lu_w3 <- read.csv("data/surveys_raw/lucid/US_survey_w3_raw.csv")
survey_lu_missings <- read.csv("data/surveys_raw/lucid/us_missing_w0_sociodems.csv")

survey_lu_w0a <- recode_US_w0(survey_lu_w0a)
survey_lu_w0b <- recode_US_w0(survey_lu_w0b)
survey_lu_w0 <- rbind(survey_lu_w0a, survey_lu_w0b)

survey_lu_w1 <- recode_US_w1(survey_lu_w1)
survey_lu_w2 <- recode_US_w2(survey_lu_w2)
survey_lu_w3 <- recode_US_w3(survey_lu_w3)
survey_lu_w1_w2_w3 <- survey_lu_w1 %>%
  left_join(., survey_lu_w2, by = "person_id") %>%
  left_join(., survey_lu_w3, by = "person_id")

## Sociodemographics ####

survey_lu_all <- survey_lu_w0 %>%
  left_join(., survey_lu_missings %>%
    rename(
      "gender_estimated" = gender_w0,
      "age_estimated" = age_w0,
      "ethn_estimated" = ethn_w0,
      "hisp_estimated" = hisp_w0,
      "edu_estimated" = edu_w0
    ) %>%
    mutate(gender_estimated = gender_estimated - 1),
  by = "person_id"
  ) %>%
  mutate(
    age_w0 = ifelse(
      is.na(age_w0) & !is.na(age_estimated),
      age_estimated, age_w0
    ),
    gender_w0 = ifelse(
      is.na(gender_w0) & !is.na(gender_estimated),
      gender_estimated, gender_w0
    ),
    ethn_w0 = ifelse(
      is.na(ethn_w0) & !is.na(ethn_estimated),
      ethn_estimated, ethn_w0
    ),
    edu_w0 = ifelse(
      is.na(edu_w0) & !is.na(edu_estimated),
      edu_estimated, edu_w0
    )
  ) %>%
  rename(
    "gender" = gender_w0_fac,
    "age" = age_w0,
    "edu" = edu_w0_fac,
    "ethnicity" = ethn_w0_fac
  ) %>%
  mutate(
    female = case_when(
      gender == "Male" ~ 0,
      gender == "Female" ~ 1
    ),
    age_high = case_when(
      age >= 65 ~ "65 or more",
      age < 65 ~ "less than 65"
    ),
    edu_high = case_when(
      edu_w0 < 12 ~ 0,
      edu_w0 >= 12 ~ 1
    ),
    white = case_when(
      ethnicity == "White" ~ 1,
      ethnicity %in% c(
        "Black", "Asian", "Native American", "Prefer to self-describe"
      ) ~ 0
    ),
    race_cat = case_when(
      ethnicity == "White" ~ 1,
      ethnicity %in% c(
        "Black"
      ) ~ 0,
      ethnicity %in% c("Asian", "Native American", "Prefer to self-describe") ~ 1
    ),
    edu_cat = case_when(
      edu_w0 %in% c(1, 5, 6) ~ 1,
      edu_w0 %in% c(8) ~ 2,
      edu_w0 %in% c(10) ~ 3,
      edu_w0 %in% c(9, 11) ~ 4,
      edu_w0 %in% c(12, 13) ~ 5,
      edu_w0 %in% c(14, 15) ~ 6
    ),
    age_cat = case_when(
      age > 18 & age < 25 ~ 2,
      age > 24 & age < 35 ~ 3,
      age > 34 & age < 45 ~ 4,
      age > 44 & age < 55 ~ 5,
      age > 54 & age < 65 ~ 6,
      age > 64 ~ 7
    ),
    ideo_cat = case_when(
      ideo_w0 > 5 ~ 1,
      ideo_w0 < 5 ~ 2,
      ideo_w0 == 5 ~ 3
    ),
    party_cat = case_when(
      party_w0 > 4 ~ 1,
      party_w0 < 4 ~ 2,
      party_w0 == 4 ~ 3
    ),
    party_bin = case_when(
      party_w0 > 4 ~ "Republican",
      party_w0 < 4 ~ "Democrat"
    )
  ) %>%
  full_join(., survey_lu_w1_w2_w3, by = "person_id")

## Survey participation / donor status ####

# all participants did surveys

survey_lu_all <- survey_lu_all %>%
  mutate(
    participated_w1 = ifelse(!is.na(start_date_us_w1), T, F),
    participated_w2 = ifelse(!is.na(start_date_us_w2), T, F),
    participated_w3 = ifelse(!is.na(start_date_us_w3), T, F),
    donor = ifelse(participated_w1 == T, 1, 0)
  ) %>%
  mutate(donor = ifelse(is.na(donor), 0, donor)) %>%
  # coalesce id variable
  mutate(person_id = ifelse(is.na(person_id), ResponseId_w0, person_id))

write.csv(survey_lu_all, file = "data/surveys_processed/survey_LU_all.csv", row.names = F)

## Construct weights for whole sample ####

### get targets ####
# "... we base our weights on age, gender, education,...
# For Lucid and Yougov, we add ideology and partisanship."

survey_lu_perc_age <- prop.table(table(survey_lu_all$age_cat)) * 100
survey_lu_perc_female <- prop.table(table(survey_lu_all$female)) * 100
survey_lu_perc_edu_cat <- prop.table(table(survey_lu_all$edu_cat)) * 100
survey_lu_perc_race_cat <- prop.table(table(survey_lu_all$race_cat)) * 100
survey_lu_perc_ideo <- prop.table(table(survey_lu_all$ideo_cat)) * 100
survey_lu_perc_party <- prop.table(table(survey_lu_all$party_cat)) * 100

survey_lu_targets <- list(
  age_cat_chr = survey_lu_perc_age,
  female_chr = survey_lu_perc_female,
  edu_cat_chr = survey_lu_perc_edu_cat,
  race_cat_chr = survey_lu_perc_race_cat,
  ideo_cat_chr = survey_lu_perc_ideo,
  party_cat_chr = survey_lu_perc_party
)

### subset to donors ####

survey_lu_donors <- survey_lu_all %>%
  mutate(donor = ifelse(participated_w1 == TRUE, 1, 0)) %>%
  as.data.frame() %>%
  filter(donor == 1) %>%
  mutate(across(c(age_cat, female, edu_cat, race_cat, ideo_cat, party_cat), ~ as.factor(.), .names = "{col}_chr"))

survey_lu_to_match <- survey_lu_donors %>%
  select(person_id, age_cat_chr, female_chr, edu_cat_chr, race_cat_chr, ideo_cat_chr, party_cat_chr)

### Construct weights ####

survey_lu_raking <- anesrake(
  survey_lu_targets, survey_lu_to_match,
  caseid = survey_lu_to_match$person_id,
  type = "nolim", verbose = F
)

survey_lu_weights <- data.frame(
  person_id = names(survey_lu_raking$weightvec),
  weight = survey_lu_raking$weightvec
)

## Recode other variables for donor sample ####

### Political outcomes ####

survey_lu_donors <- survey_lu_donors %>%
  mutate(
    ### Partisanship/Ideology
    party_w1 = ifelse(is.na(party_w1), party_w0, party_w1),
    ideo_w1 = ifelse(is.na(ideo_w1), ideo_w0, ideo_w1),
    ### Affective polarization
    ft_outparty_w1 = case_when(
      party_w1 < 4 ~ ft_rep_supporters_w1,
      party_w1 > 4 ~ ft_dem_supporters_w1
    ),
    ### Political knowledge
    polknow_w3 = rowMeans(select(., c(
      polknow_pres_w3, polknow_majority_w3,
      polknow_senator_w3, polknow_spend_w3
    )), na.rm = T),
    ## Recode outcome variables to 0-1
    across(c(trust_gov_w2, int_politics_w2), ~ (. - 1) / (7 - 1)),
    across(c(ideo_w1, ideo_w2, ideo_w3), ~ (. - 0) / (10 - 0))
  )

### Quality checks ####

survey_lu_donors <- survey_lu_donors %>%
  # Straightlining
  mutate(
    straightliner = ifelse(
      malvol_1_w1 == malvol_2_w1 &
        malvol_2_w1 == malvol_3_w1 &
        malvol_3_w1 == malvol_4_w1 &
        malvol_4_w1 == malvol_5_w1 &
        malvol_1_w1 != 4, 1, 0
    )
  ) %>%
  # Speeding (alternative measures)
  rename("duration" = meta_duration_w1) %>%
  # Speeding (alternative measures)
  mutate(duration_median = median(duration, na.rm = T)) %>%
  mutate(
    duration_median_30 = ifelse(duration < 0.7 * duration_median, 1, 0),
    duration_median_40 = ifelse(duration < 0.6 * duration_median, 1, 0),
    duration_median_50 = ifelse(duration < 0.5 * duration_median, 1, 0)
  ) %>%
  # Experiment
  # 2 experimental treatments:
  ## "It is important to be open to different points of view on political issues."
  ## Hypothesized effect on attribution of malevolence
  ## "Looking at news sources that present opposing political views is important to good citizenship."
  ## Hypothesized effect on perception of polarization
  mutate(norm_treatment_w1 = case_when(
    norm_treatment_w1 == "control" ~ 0,
    norm_treatment_w1 == "treat" ~ 1
  )) %>%
  mutate(
    malvol_4_w1 = malvol_4_w1 * (-1) + 8,
    malvol_5_w1 = malvol_5_w1 * (-1) + 8
  ) %>%
  mutate(
    malvol_w1 = rowMeans(
      select(., c(
        malvol_1_w1, malvol_2_w1,
        malvol_3_w1, malvol_4_w1, malvol_5_w1
      )),
      na.rm = T
    )
  ) %>%
  mutate(
    perpol_w1 = rowMeans(
      select(., c(
        perpol_1_w1, perpol_2_w1,
        perpol_3_w1, perpol_4_w1
      )),
      na.rm = T
    )
  )

## Join weights ####

survey_lu_donors <- left_join(survey_lu_donors, survey_lu_weights)

## Subset to used variables ####

survey_lu_donors <- survey_lu_donors %>%
  select(
    person_id, participated_w1, participated_w2, participated_w3, donor, weight,
    age, female, edu_high, white, party_w1, ideo_w1, age_high, party_bin,
    ft_outparty_w1, int_politics_w2, polknow_w3,
    straightliner, duration, duration_median_30, duration_median_40, duration_median_50,
    all_of(vars_w1_lu), all_of(vars_w2_lu),
    norm_treatment_w1, malvol_w1, perpol_w1
  )

## Export ####
# note that this processed survey data set contains participants...
# ... that will be excluded because of the seven-day filter.

write.csv(survey_lu_donors, file = "data/surveys_processed/survey_LU_donors.csv", row.names = F)

# YOUGOV =======================================================================

survey_yg_all <- read_rds("data/surveys_raw/yougov/yg_survey_with_ids_anon_reduced.rds")
survey_yg_duration <- read_sav("data/surveys_raw/yougov/NYUU0010_w1w2_OUTPUT_anon_reduced.sav")

## Sociodemographics ####

survey_yg_all <- survey_yg_all %>%
  rename("person_id" = id_anon) %>%
  mutate(
    age = 2018 - as.numeric(birthyr),
    age_cat = case_when(
      age > 18 & age < 25 ~ 2,
      age > 24 & age < 35 ~ 3,
      age > 34 & age < 45 ~ 4,
      age > 44 & age < 55 ~ 5,
      age > 54 & age < 65 ~ 6,
      age > 64 ~ 7
    ),
    age_high = case_when(
      age >= 65 ~ "65 or more",
      age < 65 ~ "less than 65"
    ),
    female = case_when(gender == 2 ~ 1, TRUE ~ 0),
    edu_cat = case_when(educ %in% c(1, 2) ~ 1, TRUE ~ educ),
    edu_high = case_when(educ %in% c(5, 6) ~ 1, TRUE ~ 0),
    white = ifelse(race == 1, 1, 0),
    race_cat = case_when(
      race == 1 ~ 1,
      race == 2 ~ 2,
      race == 3 ~ 3,
      race %in% c(4, 5, 6, 7, 8) ~ 4
    ),
    party = pid7,
    party_cat = case_when(
      party > 4 ~ 1,
      party < 4 ~ 2,
      party == 4 ~ 3,
      party == 8 ~ NA
    ),
    party_bin = case_when(
      party > 4 ~ "Republican",
      party < 4 ~ "Democrat"
    ),
    ideo = case_when(w1_ideo_self == 998 ~ NA_real_, TRUE ~ w1_ideo_self / 100),
    ideo_cat = case_when(
      ideo > .5 ~ 1,
      ideo < .5 ~ 2,
      ideo == .5 ~ 3
    )
  )

## Survey participation / donor status ####
## For this dataset, donor status is defined by whether any browsing data submitted

people_browsing_yg <- read.csv("data/browsing_summarized/people_visits_YG_anon.csv") %>%
  rename("person_id" = id_anon) %>%
  select(person_id, n_days_active) 

survey_yg_all <- survey_yg_all %>%
  full_join(., people_browsing_yg, by = "person_id") %>%
  mutate(donor = ifelse(n_days_active >= 0, 1, 0)) %>%
  mutate(donor = ifelse(is.na(donor), 0, donor)) %>%
  mutate(
    participated_w1 = ifelse(!is.na(starttime), T, F),
    participated_w2 = ifelse(!is.na(starttime_w2), T, F))

### Political outcomes
survey_yg_all <- survey_yg_all %>%
  mutate(
    w1_therm_dem = ifelse(w1_therm_dem == 998, NA_real_, w1_therm_dem),
    w1_therm_rep = ifelse(w1_therm_dem == 998, NA_real_, w1_therm_rep),
    ft_outparty = case_when(party < 4 ~ w1_therm_rep, party > 4 ~ w1_therm_dem),
    ft_outparty = ifelse(ft_outparty == 998, NA, ft_outparty)
  )

# function to combine randomization A and B
unify_a_b <- function(data, var_a, var_b, name_new_var, missing) {
  data %>% mutate("{{name_new_var}}" := case_when(
    {{ var_a }} == missing ~ {{ var_b }}, TRUE ~ {{ var_a }}
  ))
}

# political knowledge
survey_yg_all <- survey_yg_all %>%
  unify_a_b(w1_knowledge_unemployment_rateA, w1_knowledge_unemployment_rateB, w1_knowledge_unemployment_rate, 9) %>%
  unify_a_b(w2_knowledge_unemployment_rateA, w2_knowledge_unemployment_rateB, w2_knowledge_unemployment_rate, 9) %>%
  # classify as correct/incorrect
  mutate(
    pol_know1 = ifelse(w1_knowledge_chinese_tarrifs == 1, 1, 0),
    pol_know2 = ifelse(w1_knowledge_stock_market == 1, 1, 0),
    pol_know3 = ifelse(w1_knowledge_US_tarrifs == 1, 1, 0),
    pol_know4 = ifelse(w1_knowledge_unemployment_rate == 3, 1, 0)
  ) %>%
  rowwise() %>%
  mutate(polknow = mean(c(pol_know1, pol_know2, pol_know3, pol_know4), na.rm = T)) %>%
  ungroup()

# following online news about politics
survey_yg_all <- survey_yg_all %>%
  unify_a_b(w1_online_news_freq, w1_online_news_freq_b, foll_politics, "99") %>%
  unify_a_b(w1_internet_frequency, w1_internet_frequency_b, internet_usage, "99") %>%
  mutate_at(vars(foll_politics, internet_usage), ~ ifelse(. == "98", NA, .)) %>%
  # set missing values to NA and scale to 0-1
  mutate(
    foll_politics = ifelse(foll_politics == 8, NA, 8 - foll_politics),
    foll_politics = rescale(foll_politics)
  ) %>%
  mutate(
    newsint_rescale = ifelse(newsint %in% c(8, 7), NA, 7 - newsint),
    int_politics = rescale(newsint_rescale)
  )

write.csv(survey_yg_all, file = "data/surveys_processed/survey_YG_all.csv", row.names = F)

## Construct weights for whole sample ####

### get targets ####
# "... we base our weights on age, gender, education,...
# For Lucid and Yougov, we add ideology and partisanship."

survey_yg_perc_age <- prop.table(table(survey_yg_all$age_cat)) * 100
survey_yg_perc_female <- prop.table(table(survey_yg_all$female)) * 100
survey_yg_perc_edu_cat <- prop.table(table(survey_yg_all$edu_cat)) * 100
survey_yg_perc_race_cat <- prop.table(table(survey_yg_all$race_cat)) * 100
survey_yg_perc_ideo <- prop.table(table(survey_yg_all$ideo_cat)) * 100
survey_yg_perc_party <- prop.table(table(survey_yg_all$party_cat)) * 100

survey_yg_targets <- list(
  age_cat_chr = survey_yg_perc_age,
  female_chr = survey_yg_perc_female,
  edu_cat_chr = survey_yg_perc_edu_cat,
  race_cat_chr = survey_yg_perc_race_cat,
  ideo_cat_chr = survey_yg_perc_ideo,
  party_cat_chr = survey_yg_perc_party
)

### subset to donors ####

survey_yg_donors <- survey_yg_all %>%
  as.data.frame() %>%
  filter(donor == 1) %>%
  mutate(across(c(age_cat, female, edu_cat, race_cat, ideo_cat, party_cat), ~ as.factor(.), .names = "{col}_chr"))

survey_yg_to_match <- survey_yg_donors %>%
  select(person_id, age_cat_chr, female_chr, edu_cat_chr, race_cat_chr, ideo_cat_chr, party_cat_chr)

### Construct weights ####

survey_yg_raking <- anesrake(
  survey_yg_targets, survey_yg_to_match,
  caseid = survey_yg_to_match$person_id,
  type = "nolim", verbose = FALSE
)

survey_yg_weights <- data.frame(
  person_id = names(survey_yg_raking$weightvec),
  weight = survey_yg_raking$weightvec
)

## Recode other variables for donor sample ####

### Quality checks ####

survey_yg_donors <- survey_yg_donors %>%
  rename(
    "w1_abortiona" = CC15_322a,
    "w1_abortionb" = CC15_322b,
    "w1_abortionc" = CC15_322c,
    "w1_abortiond" = CC15_322d,
    "w1_abortione" = CC15_322e
  ) %>%
  mutate(straightliner = ifelse(
    w1_abortiona == w1_abortionb &
      w1_abortionb == w1_abortionc &
      w1_abortionc == w1_abortiond &
      w1_abortiond == w1_abortione, 1, 0
  ))

survey_yg_duration <- survey_yg_duration %>%
  select("person_id" = id_anon, contains("time")) %>%
  mutate(across(c(starttime, endtime), ~ as.POSIXct(.x, format = "%Y-%m-%d %H:%M:%OS"))) %>%
  mutate(duration = as.numeric(endtime - starttime, units = "secs"))
survey_yg_duration_median <- median(survey_yg_duration$duration)

survey_yg_duration <- survey_yg_duration %>%
  mutate(
    duration_median_30 = ifelse(duration < 0.7 * survey_yg_duration_median, 1, 0),
    duration_median_40 = ifelse(duration < 0.6 * survey_yg_duration_median, 1, 0),
    duration_median_50 = ifelse(duration < 0.5 * survey_yg_duration_median, 1, 0)
  ) %>%
  select(person_id, starts_with("duration"))

survey_yg_donors <- left_join(survey_yg_donors, survey_yg_duration)

## Join weights ####

survey_yg_donors <- survey_yg_donors %>%
  mutate(person_id = as.character(person_id)) %>%
  # remove browsing variables, as this will be joined later again
  select(-n_days_active)
survey_yg_donors <- left_join(survey_yg_donors, survey_yg_weights)

## Subset to used variables ####

survey_yg_donors <- survey_yg_donors %>%
  select(
    person_id, donor, weight, endtime_w2, participated_w1, participated_w2,
    age, female, edu_high, white, party, ideo, age_high, party_bin,
    ft_outparty, foll_politics, polknow, int_politics,
    straightliner, duration, duration_median_30, duration_median_40, duration_median_50,
    all_of(vars_w1_yg), all_of(vars_w2_yg)
  )

## Export ####
# note that this processed survey data set contains participants...
# ... that will be excluded because of the seven-day filter.

write.csv(survey_yg_donors, file = "data/surveys_processed/survey_YG_donors.csv", row.names = F)
